12wk-045: 아이스크림 / 부스팅

Author

최규빈

Published

November 21, 2023

1. 강의영상

2. Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.tree
import sklearn.ensemble
#---#
import warnings
warnings.filterwarnings('ignore')
#---#
import matplotlib.animation
import IPython

3. Data

np.random.seed(43052)
temp = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/temp.csv').iloc[:,3].to_numpy()[:80]
temp.sort()
eps = np.random.randn(80)*3 # 오차
icecream_sales = 20 + temp * 2.5 + eps 
df_train = pd.DataFrame({'temp':temp,'sales':icecream_sales})
df_train
temp sales
0 -4.1 10.900261
1 -3.7 14.002524
2 -3.0 15.928335
3 -1.3 17.673681
4 -0.5 19.463362
... ... ...
75 9.7 50.813741
76 10.3 42.304739
77 10.6 45.662019
78 12.1 48.739157
79 12.4 46.007937

80 rows × 2 columns

4. 적합

## step1
X = df_train[['temp']]
y = df_train['sales']
## step2
predictr = sklearn.ensemble.GradientBoostingRegressor(learning_rate=0.1) 
## step3 
predictr.fit(X,y) 
## step4 
yhat = predictr.predict(X) 
plt.plot(X,y,'o')
plt.plot(X,yhat,'--')

5. yhat을 얻는과정

predictr.estimators_[1][0]
DecisionTreeRegressor(criterion='friedman_mse', max_depth=3,
                      random_state=RandomState(MT19937) at 0x7FAFC065B440)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
trees = [t[0] for t in predictr.estimators_]
predictions = np.stack([tree.predict(X) for tree in trees])
plt.plot(predictions[0]) # 의사결정나무0의 예측
plt.plot(predictions[1]) # 의사결정나무1의 예측
plt.plot(predictions[2]) # 의사결정나무2의 예측

predictions[0:2].sum(axis=0)
array([-28.12857773, -28.12857773, -28.12857773, -28.12857773,
       -28.12857773, -28.12857773, -28.12857773, -28.12857773,
       -28.12857773, -17.76744106, -17.76744106, -17.76744106,
       -17.76744106, -17.76744106, -17.76744106, -17.76744106,
       -17.76744106, -17.76744106, -17.76744106,  -8.07411483,
        -8.07411483,  -8.07411483,  -8.07411483,  -8.07411483,
        -8.07411483,  -8.07411483,  -8.07411483,  -8.07411483,
        -1.82748008,  -1.82748008,  -1.82748008,  -1.82748008,
        -1.82748008,  -1.82748008,  -1.82748008,  -1.82748008,
        -1.82748008,  -1.82748008,  -1.82748008,  -1.82748008,
        -1.82748008,  -1.82748008,  -1.82748008,  -1.82748008,
        -1.82748008,   6.48143774,   6.48143774,   6.48143774,
         6.48143774,   6.48143774,   6.48143774,   6.48143774,
         6.48143774,   6.48143774,   6.48143774,   6.48143774,
        11.74216296,  11.74216296,  11.74216296,  11.74216296,
        11.74216296,  11.74216296,  11.74216296,  11.74216296,
        19.21655361,  19.21655361,  19.21655361,  19.21655361,
        19.21655361,  19.21655361,  19.21655361,  19.21655361,
        19.21655361,  19.21655361,  29.52785838,  29.52785838,
        29.52785838,  29.52785838,  29.52785838,  29.52785838])
plt.plot(X,y,'o',alpha=0.5,label = 'RawData')
plt.plot(X,y*0+y.mean(),'--', label='MeanPredictor')
plt.plot(X,predictions[0:1].sum(axis=0)*0.1+y.mean(),'--', label='WeakPredictor (ver 0.01)')
plt.plot(X,predictions[0:2].sum(axis=0)*0.1+y.mean(),'--', label='WeakPredictor (ver 0.02)')
plt.plot(X,predictions[0:3].sum(axis=0)*0.1+y.mean(),'--', label='WeakPredictor (ver 0.03)')
plt.plot(X,predictions[:].sum(axis=0)*0.1+y.mean(),'--', label='WeakPredictor (ver 1.00)')
plt.legend()
<matplotlib.legend.Legend at 0x7faf01a4dea0>

- 애니메이션으로 표현해보자.

fig = plt.figure()
<Figure size 640x480 with 0 Axes>
def func(i):
    ax = fig.gca()
    ax.clear()
    ax.plot(X,y,'o',alpha=0.5,label='RawData')
    ax.plot(X,predictions[0:i].sum(axis=0)*0.1+y.mean(),'--',label=f'WeekPredictor (ver {(i+1)/100:.2f})')
    ax.legend()
ani = matplotlib.animation.FuncAnimation(
    fig = fig,
    func = func,
    frames = 100
)
display(IPython.display.HTML(ani.to_jshtml()))

6. 재현

A. 재현의 확인

- 아이디어: - 처음부터 yhat을 강하게 학습하지 말고 약하게 조금씩 학습하자. - 그리고 부족한 부분(= 학습이 안된 부분 = y-yhat)을 조금씩 강화시키며 보완하자.

trees_manual = []
predictions_manaul = []
residuals_manual = []
res = y - y.mean()
for i in range(100):    
    tree = sklearn.tree.DecisionTreeRegressor(
        criterion='friedman_mse',
        max_depth=3
    )
    tree.fit(X,res)
    yhat = tree.predict(X) # 한번에 다 학습하지 말고 일단 0.1만큼만 학습하자. 0.1은 "학습률" 
    res = res - yhat*0.1
    trees_manual.append(tree) 
    predictions_manaul.append(yhat)
    residuals_manual.append(res)
predictions_manaul = np.stack(predictions_manaul)
residuals_manual = np.stack(residuals_manual)

- 비교

fig,ax = plt.subplots(2,2)
plt.close()
def func(i):
    ax[0][0].clear();
    ax[0][0].plot(X,y,'o',alpha=0.5)
    ax[0][0].plot(X,predictions[0:i].sum(axis=0)*0.1+y.mean(),'--')
    ax[0][1].clear();
    ax[0][1].plot(X,y,'o',alpha=0.5)
    ax[0][1].plot(X,predictions_manaul[0:i].sum(axis=0)*0.1+y.mean(),'--')
    #--#
    sklearn.tree.plot_tree(trees[i],max_depth=0,ax=ax[1][0])
    sklearn.tree.plot_tree(trees_manual[i],max_depth=0,ax=ax[1][1])
ani = matplotlib.animation.FuncAnimation(
    fig,
    func,
    frames = 20
)
display(IPython.display.HTML(ani.to_jshtml()))

B. Step별 분석

fig,ax = plt.subplots(1,4,figsize=(10,3))
plt.close()
def func(i):
    ax[0].clear();
    ax[0].plot(X,y,'o',alpha=0.5)
    ax[0].plot(X,predictions[0:i].sum(axis=0)*0.1+y.mean(),'--')
    ax[0].set_title("Step0")
    ax[1].clear();
    ax[1].set_ylim(-20,20)
    ax[1].plot(X,residuals_manual[i],'o',alpha=0.5)
    ax[1].set_title("Step1:Residual")
    ax[2].clear();
    ax[2].set_ylim(-20,20)
    ax[2].plot(X,residuals_manual[i],'o',alpha=0.5)
    ax[2].plot(X,predictions[i],'--')
    ax[2].set_title("Step2:Fit")
    ax[3].clear();
    ax[3].plot(X,y,'o',alpha=0.5)
    ax[3].plot(X,predictions[:i].sum(axis=0)*0.1+y.mean(),'--',color='C1')
    ax[3].plot(X,predictions[:(i+1)].sum(axis=0)*0.1+y.mean(),'--',color='C3')
    ax[3].set_title("Step3:Update")        
ani = matplotlib.animation.FuncAnimation(
    fig,
    func,
    frames = 20
)
display(IPython.display.HTML(ani.to_jshtml()))
  • 관찰1: “Step1: Residual”은 점점 단순오차차럼 변화한다.
  • 관찰2: “Step2: Fit”의 분기점들은 고정된 값이 아니다. (계속 변한다)
  • 관찰3: “Step3: Update” 업데이터되는 양은 반복이 진행될수록 점점 작아진다.

- 위의 그림에서

  • Step0: 공부할 자료, 현재까지 공부량
  • Step1: 남은 공부량
  • Step2: 공부! (이해O / 암기X)
  • Step3: 공부의 10%의 기억.. 기억나는 것만 두뇌에 update되어있음.

- 느낌: 조금씩 데이터를 학습한다. 학습할 자료가 오차항처럼 보인다면? 그때는 적합을 멈춘다. (오차항을 적합할 필요는 없잖아?)